package com.larry.spark.rdd.transform

import org.apache.spark.{SparkConf, SparkContext}

object RDD_Oper_Test_3 {

  def main(args: Array[String]): Unit = {
    //TODO  使用spark 省份广告点击top3 先统计 后分组

    val conf = new SparkConf().setMaster("local[*]").setAppName("req")
    val sc = new SparkContext(conf)

    val lines = sc.textFile("data/agent.log")
    //1516609143867 6 7 64 16
    //((6,16),1)
    val datat = lines.map(
      l => {
        val data = l.split(" ")
        (data(1), (data(4), 1))
      }
    )
    val datag = datat.groupByKey()

    val data = datag.mapValues(
      it => {
        val datamap = it.groupBy(_._1).mapValues(_.size)
        datamap.toList.sortBy(_._2)(Ordering.Int.reverse).take(3)
      }
    )

    data.collect().foreach(println)
    sc.stop()
  }

}
